
%% read data
load ../../data/data.txt

data(data==-1)=nan;

id=data(:,1);
married=data(:,2);
age=data(:,3);
W_m_tau_m=data(:,4);
W_f_tau_f=data(:,5);
p_g=data(:,6);
P_c_Y_c=data(:,7);
pbar_X=data(:,8);
W_m=data(:,9);
W_f=data(:,10);
p=data(:,11);
P_c=data(:,12);
tau_m=data(:,13);
tau_f=data(:,14);
g=data(:,15);
Y_c=data(:,16);
hrs_m=data(:,17);
hrs_f=data(:,18);
W=data(:,19);
nkid=data(:,20);
nkid_0_5=data(:,21);
edu4_m=data(:,22); % original education category (used only for regressors)
edu4_f=data(:,23);
type=data(:,24);

pbar_X_data=pbar_X;

N=size(age,1);

% 2 categories for education
edu_m=1*(edu4_m==1 | edu4_m==2)+2*(edu4_m==3 | edu4_m==4);
edu_f=1*(edu4_f==1 | edu4_f==2)+2*(edu4_f==3 | edu4_f==4);

% annual full income (100 hours per week times 52 weeks)
W=W*5200;

% indicators for single and married
j=zeros(2,2);

for i=1:N
    
    % index for marital status and education
    m=married(i)+1; % 1 for single, 2 for married
    e=edu_m(i);
    
    j(m,e)=j(m,e)+1;
    eval(['ind' int2str(m) int2str(e) '(j(m,e))=i;']);
    
end

%% target for calibration

insample_data=(P_c_Y_c>0 & pbar_X>0); % positive and non-missing expenditure

% select families with positive childcare expenditures
tau_m(insample_data==0)=nan;
hrs_m(insample_data==0)=nan;
hrs_f(insample_data==0)=nan;

for m=1:2
    for e=1:2
        
        % expenditure shares
        eval(['share_data(m,e,1)=mean(W_m_tau_m(ind' int2str(m) ...
              int2str(e) ')./pbar_X(ind' int2str(m) int2str(e) '),''omitnan'');']);

        eval(['share_data(m,e,2)=mean(W_f_tau_f(ind' int2str(m) ...
              int2str(e) ')./pbar_X(ind' int2str(m) int2str(e) '),''omitnan'');']);

        eval(['share_data(m,e,3)=mean(p_g(ind' int2str(m) ...
              int2str(e) ')./pbar_X(ind' int2str(m) int2str(e) '),''omitnan'');']);

         eval(['share_data(m,e,4)=mean(P_c_Y_c(ind' int2str(m) ...
               int2str(e) ')./pbar_X(ind' int2str(m) int2str(e) '),''omitnan'');']);

        % levels
        eval(['level_data(m,e,1)=mean(tau_m(ind' int2str(m) ...
               int2str(e) '),''omitnan'');']);
        
        eval(['level_data(m,e,2)=mean(hrs_m(ind' int2str(m) ...
              int2str(e) '),''omitnan'');']);

        eval(['level_data(m,e,3)=mean(hrs_f(ind' int2str(m) ...
              int2str(e) '),''omitnan'');']);
        
    end
end

clear tab
tab(1,:)=share_data(1,:,1);
tab(2,:)=share_data(1,:,3);
tab(3,:)=share_data(2,:,1);
tab(4,:)=share_data(2,:,2);
tab(5,:)=share_data(2,:,3);
csvwrite('../temp/share_data.csv',tab)

clear tab
tab(1,:)=level_data(1,:,1);
tab(2,:)=level_data(1,:,2);
tab(3,:)=level_data(2,:,1);
tab(4,:)=level_data(2,:,2);
tab(5,:)=level_data(2,:,3);
csvwrite('../temp/level_data.csv',tab)
